## R Script Output ------------------------------------------------------------
# Appendix Table D.2: Descriptive Statistics of 2017 Immigration Lobbying by NAICS 2-digit Industries


## Instructions ----------------------------------------------------------------
# Step 1: Adjust MAIN_DIR to where README.txt is located
# Step 2: Run entire script


## IMPORTANT NOTE --------------------------------------------------------------
# Appendix Table D.2 uses Orbis' proprietary data on firm-level 4-digit core NAICS 
# industry codes. To protect Orbis' proprietary data, this script loads saved summary 
# data aggregated at the 2-digit level. I create the anonymous summary data using 
# the code below.
# 
# # load main dataset
# load(file = paste(MAIN_DIR, "data-merge-91-17.RData", sep = "/"))
# 
# # subset and create variables
# table.d2.data <- data.merge %>%
#   select(year,
#          bvd_id,
#          h1b_pet_firm_year_tot, 
#          l1_pet_firm_year_tot,
#          lob_img_2017, 
#          naics_core) %>%
#   filter(year == 2017) %>% 
#   mutate(petitioned = ifelse(h1b_pet_firm_year_tot > 0 | l1_pet_firm_year_tot > 0, 1, 0),
#          naics_2d = str_sub(naics_core, start = 1, end = 2),
#          naics_2d = ifelse(naics_2d == 31 | naics_2d == 32 | naics_2d == 33, "31-33", naics_2d),
#          naics_2d = ifelse(naics_2d == 44 | naics_2d == 45, "44-45", naics_2d),
#          naics_2d = ifelse(naics_2d == 48 | naics_2d == 49, "48-49", naics_2d)) 
# 
# # summarize
# table.d2.data.anonymous <- table.d2.data %>%
#   group_by(naics_2d) %>%
#   summarize(n_firms_tot = n_distinct(bvd_id),
#             n_firms_lob = sum(lob_img_2017, na.rm = TRUE)) %>%
#   ungroup() %>%
#   mutate(per_lob = round(n_firms_lob / n_firms_tot * 100, 1))  %>%
#   filter(!is.na(naics_2d)) %>%
#   filter(naics_2d != "99")
# 
# # save
# save(table.d2.data.anonymous, 
#      file = paste(MAIN_DIR, "Appendix-Table-D2-data-anonymous.RData", sep = "/"))


## setup -----------------------------------------------------------------------
# clean slate
rm(list = ls())
date()

# load packages
pkg <- c("tidyverse",
         "RColorBrewer", 
         "gridExtra", 
         "viridis",
         "xtable")

lapply(pkg, require, character.only = TRUE)

# set main directory
MAIN_DIR <- "~/Dropbox/Research/JOP-h1b-replication"


## load data -------------------------------------------------------------------
load(file = paste(MAIN_DIR, "Appendix-Table-D2-data-anonymous.RData", sep = "/"))


## prepare data for table ------------------------------------------------------
# add 2-digit NAICS description
naics.names <- c("Agriculture, Forestry, Fishing, Hunting",  
                 "Mining, Quarrying, and Oil/Gas Extraction",                     
                 "Utilities",                                                               
                 "Construction",                                                            
                 "Manufacturing",                                                                        
                 "Wholesale Trade",                                                        
                 "Retail Trade",                                                                        
                 "Transportation and Warehousing",                                                                
                 "Information",                                                             
                 "Finance and Insurance",                                                   
                 "Real Estate and Rental and Leasing",                                      
                 "Professional, Scientific, Technical SVC",                        
                 "Management of Companies and Enterprises",                                 
                 "Admin. Support, Waste Management, Remediation SVC",
                 "Educational Services",                                                    
                 "Health Care and Social Assistance",                                       
                 "Arts, Entertainment, and Recreation",                                     
                 "Accommodation and Food Services",                                         
                 "Other Services (except Public Administration)",                           
                 "Public Administration")

# add example firms
example.vec <- c(
  # 11
  "LAND O'LAKES INC",
  
  # 21
  "OCCIDENTAL PETROLEUM CORP",
  
  # 22
  "PACIFIC GAS & ELECTRIC COMPANY",
  
  # 23
  "LENNAR CORP",
  
  # 31-33
  "APPLE INC",
  
  # 42
  "NU SKIN ENTERPRISES INC",
  
  # 44-45
  "WALMART INC",
  
  # 48-49
  "DELTA AIR LINES INC",
  
  # 51
  "MICROSOFT CORPORATION",
  
  # 52
  "THE WESTERN UNION CO",
  
  # 53
  "CBRE GROUP INC",
  
  # 54
  "ACCENTURE LLP",
  
  # 55
  "SVB FINANCIAL GROUP",
  
  # 56
  "EQUIFAX INC",
  
  # 61
  "SAS INSTITUTE INC",
  
  # 62
  "COMMUNITY HEALTH SYSTEMS INC",
  
  # 71 
  "FELD ENTERTAINMENT INC",
  
  # 72
  "MCDONALDS CORP",
  
  # 81
  "DESALES MEDIA GROUP INC",
  
  # 92
  "PAUL HASTINGS LLP")

# clean
naics.sum <- table.d2.data.anonymous %>%
  mutate(naics_2d_desc = naics.names,
         example = example.vec) %>%
  select(naics_2d_desc, naics_2d,
         n_firms_tot, per_lob,
         example) %>%
  rename(NAICS = naics_2d_desc,
         Code = naics_2d,
         `# Firms` = n_firms_tot,
         `% Lobbied` = per_lob,
         `Example Firm` = example)


## save ------------------------------------------------------------------------
#sink(file.path(MAIN_DIR, "Appendix-Table-D2.tex"))
sink(file.path(MAIN_DIR, "Appendix-Table-D2.html"))
print(xtable(naics.sum, 
             caption = "Descriptive Statistics of Immigration Lobbying by NAICS 2-digit Industries.",
             label = "tb:desc",
             digits = 1),
      size = "footnotesize",
      include.rownames = FALSE,
      #type = "latex",
      type = "html")
sink()

